0. Project Setup¶
0.1 Packages & Device¶
# Torch
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
import torchvision.datasets as datasets
import matplotlib.pyplot as plt
from torch.utils.data import Dataset, DataLoader, Subset, random_split
from tqdm import tqdm
# Augmentation
import albumentations as A
from albumentations.pytorch import ToTensorV2
import scipy.io as sio
# Visualize Result
from sklearn.metrics import (confusion_matrix, accuracy_score,
precision_score, recall_score,
f1_score, roc_auc_score,
roc_curve, auc, precision_recall_curve,
average_precision_score)
from sklearn.metrics import ConfusionMatrixDisplay
from sklearn.preprocessing import label_binarize
# Basic
import numpy as np
import cv2
import os
import time
from typing import List, Tuple, Union, Dict, OrderedDict as TypingOrderedDict, Optional, Any
from collections import OrderedDict
import random
import itertools
import copy
device_name = "cuda" if torch.cuda.is_available() else "cpu"
device = torch.device(device_name)
print(f"Using device: {device_name}")
Using device: cuda
0.2 Global Configurations¶
path_dataset = "./data/SVHN_mat"
1. Data Processing and Augmentation¶
1.1 Download Datasets¶
Define dataset class, retrieve dataset.
1.1 Notes¶
_dat = sio.loadmat(os.path.join(path_dataset, "train_32x32.mat"))
# _dat['X'][0][0][0]
# np.array(_dat).shape
dat = np.transpose(_dat['X'], (3, 0, 1, 2))
# dat = dat.astype(np.float32)
dat.shape
# First Image
dat[0].shape
# First Row of Image
dat[0][0]
# First Pixel of Image
dat[0][0][0]
_transform = A.Compose([
A.Normalize(mean=[0.4376845359802246, 0.4437684714794159, 0.47280389070510864], std=[0.19803018867969513, 0.2010156661272049, 0.19703581929206848]),
ToTensorV2()
])
_img = dat[0]
_img = _transform(image=_img)['image']
# print(_img)
_img.shape
1.2 Dataset¶
class SVHNDataset(Dataset):
def __init__(self, mat_file, transform=None):
data = sio.loadmat(mat_file)
self.images = np.transpose(data['X'], (3, 0, 1, 2))
self.labels = data['y'].flatten()
self.labels[self.labels == 10] = 0
self.transform = transform # Allow postponed injection of transform.
def __len__(self):
return len(self.labels)
def __getitem__(self, idx):
image = self.images[idx]
label = self.labels[idx]
# There should always be a transform.
# It converts image to float, and permutes it from (32, 32, 3) to Tensor([3, 32, 32]).
# ...which is important!!
if self.transform is None:
raise ValueError("CISC3024 Custom Error: The transform should not be None when this object is passed into a DataLoader.")
image = self.transform(image=image)['image']
return image, label
def get_meanstd(self, contrast_factor=None, random_seed=114514):
if contrast_factor is not None:
random.seed(random_seed)
random_cf_arr = [random.uniform(1 / contrast_factor, contrast_factor) for _ in range(len(self.images))]
# random_cf = random.uniform(1 / contrast_factor, contrast_factor)
images_ = []
for i in range(len(self.images)):
image = self.images[i]
image = np.clip(image * random_cf_arr[i], 0, 255)
image = image.astype(np.uint8)
images_.append(image)
images_ = np.array(images_)
else:
images_ = self.images
images_ = images_ / 255.0
mean = np.mean(images_, axis=(0,1,2))
std = np.std(images_, axis=(0,1,2), ddof=0)
return mean.tolist(), std.tolist()
def overwrite(self, indices:Union[list, np.ndarray]):
"""
Create a deep copy of the mother dataset instance and only keep the wanted
data samples, controlled by indices.
"""
if any(index < 0 or index >= len(self.labels) for index in indices):
raise IndexError("CISC3024 Custom Error: One or more indices are out of bounds.")
new_dataset = copy.deepcopy(self)
new_dataset.images = self.images[indices]
new_dataset.labels = self.labels[indices]
return new_dataset
1.2 Peak A Data¶
def peek(dataset, index=None):
def unnormalize(img, mean, std):
"""Revert the normalization for visualization."""
img = img * std + mean
return np.clip(img, 0, 1)
mean, std = dataset.get_meanstd()
# Plotting multiple images in a grid
grid_rows, grid_cols = 1, 6
fig, axes = plt.subplots(grid_rows, grid_cols, figsize=(6, 6))
peek_index = random.randint(0, dataset.__len__()-1) if index is None else index
for i in range(grid_cols):
img_tensor, label = dataset.__getitem__(peek_index)
img = img_tensor.permute(1, 2, 0).numpy() # Convert to (H, W, C)
img = unnormalize(img, mean, std)
ax = axes[i] # Get subplot axis
ax.imshow(img)
ax.set_title(f"Label: {label}")
plt.tight_layout()
plt.show()
print(f"Peeking data from training set of index {peek_index}.\nImage Tnesor Size:{dataset.__getitem__(peek_index)[0].shape}")
2. Neural Network¶
2.1 Model Structure¶
class SmallVGG(nn.Module):
def __init__(self, frame_size=32):
super(SmallVGG, self).__init__()
self.frame_size = frame_size
self.conv_layers = nn.Sequential(
nn.Conv2d(3, 8, kernel_size=3, padding=1),
nn.ReLU(),
nn.Conv2d(8, 16, kernel_size=3, padding=1),
nn.ReLU(),
nn.MaxPool2d(kernel_size=2, stride=2), # 16x16
nn.Conv2d(16, 32, kernel_size=3, padding=1),
nn.ReLU(),
nn.Conv2d(32, 32, kernel_size=3, padding=1),
nn.ReLU(),
nn.MaxPool2d(kernel_size=2, stride=2), # 8x8
nn.Conv2d(32, 32, kernel_size=3, padding=1),
nn.ReLU(),
nn.Conv2d(32, 32, kernel_size=3, padding=1),
nn.ReLU(),
nn.MaxPool2d(kernel_size=2, stride=2), # 4x4
)
self.fc_layers = nn.Sequential(
nn.Linear(frame_size * 4 * 4, 256),
nn.ReLU(),
nn.Linear(256, 10)
)
def forward(self, x):
x = self.conv_layers(x)
x = x.view(x.size(0), -1)
x = self.fc_layers(x)
return x
2.2 Train and Evaluate Function¶
def train_and_evaluate(model,
train_loader,
valid_loader,
criterion,
optimizer,
num_epochs=100,
stop_early_params=None):
# Record Losses to plot
train_losses = []
valid_losses = []
# Early stop params
current_optimized_model = None
current_min_valid_loss = np.inf
num_overfit_epochs = 0
for epoch in range(num_epochs):
# Train
model.train()
running_loss = 0.0
for images, labels in tqdm(train_loader):
images, labels = images.to(device), labels.to(device)
optimizer.zero_grad()
outputs = model(images)
loss = criterion(outputs, labels)
loss.backward()
optimizer.step()
running_loss += loss.item() * len(images)
train_losses.append(running_loss / len(train_loader))
# Evaluate
model.eval()
valid_loss = 0.0
with torch.no_grad():
for images, labels in valid_loader:
images, labels = images.to(device), labels.to(device)
outputs = model(images)
loss = criterion(outputs, labels)
valid_loss += loss.item() * len(images)
valid_losses.append(valid_loss / len(valid_loader))
print(f"Epoch[{epoch+1}/{num_epochs}], Train Loss:{train_losses[-1]:.4f}, Validation Loss:{valid_losses[-1]:.4f}")
# Early Stop?
if stop_early_params is None:
continue
if current_min_valid_loss - stop_early_params["min_delta"] > valid_losses[-1]: # Validation loss decreases
current_min_valid_loss = valid_losses[-1]
current_optimized_model = copy.deepcopy(model)
num_overfit_epochs = (num_overfit_epochs - 1) if num_overfit_epochs > 0 else 0
else: # Validation loss increases
num_overfit_epochs += 1
if num_overfit_epochs > stop_early_params["patience"]:
print(f"Early stopping at epoch {epoch+1}.")
model = current_optimized_model
break
return train_losses, valid_losses
2.3 Get Predictions¶
Multiple functions are defined to evaluate data. Below is a list of them.
def get_predictions(model_path, extra_loader):
if not isinstance(model_path, str):
model_state = model_path
else:
model_state = torch.load(model_path)
model = SmallVGG()
model.load_state_dict(model_state)
model.to(device)
model.eval()
pred_scores = [] # Prob. of predictions
true_labels = [] # Ground Truth
pred_labels = [] # Label of prediction, i.e., argmax(softmax(pred_scores))
with torch.no_grad():
for images, labels in tqdm(extra_loader):
images, labels = images.to(device), labels.to(device)
outputs = model(images)
pred_scores_batch = nn.functional.softmax(outputs, dim=-1)
pred_scores.extend(pred_scores_batch.cpu().tolist())
pred_labels.extend(outputs.argmax(dim=1).tolist())
true_labels.extend(labels.cpu().tolist())
return pred_scores, true_labels, pred_labels
2.4 Get Metrics¶
def get_metrics(true_labels, pred_labels):
accuracy = accuracy_score(true_labels, pred_labels)
precision = precision_score(true_labels, pred_labels, zero_division=1, average=None, labels=range(0,10))
recall = recall_score(true_labels, pred_labels, zero_division=1, average=None, labels=range(0,10))
f1 = f1_score(true_labels, pred_labels, zero_division=0, average=None, labels=range(0,10))
return accuracy, precision, recall, f1
def print_metrics(accuracies, f1s):
print(f"Accuracies:")
for acc in accuracies:
print(f"{acc:.3f}", end=" ")
print("\n")
print(f"F1 Score Lists:")
mean_f1s = []
for f1 in f1s:
for val in f1:
print(f"{val:.3f}", end=" ")
mean_f1 = np.mean(f1)
std_f1 = np.std(f1)
mean_f1s.append(mean_f1)
print(f"| Avg F1={mean_f1:.3f}, Std F1={std_f1}")
print(f"Best: {np.argmax(mean_f1s)+1}-th")
# Compute ROC AUC for each class
def get_roc_auc(true_labels_bin, pred_labels_bin):
roc_auc = dict()
for i in range(0, 10):
roc_auc[i] = roc_auc_score(true_labels_bin[:,i], np.array(pred_scores)[:, i])
return roc_auc
3. Experiments¶
3.0 Preparation¶
3.0.1 Plot Functions¶
The experiments will be a list of the following structures:
{
"HYPER_PARAM_1": combo[0],
"HYPER_PARAM_2": combo[1],
"train_losses": train_losses,
"valid_losses": valid_losses,
"model_state_dict": exp_model.state_dict()
}
Epoch-Loss Curves¶
def plot_el(loaded_experiments, hyper_param_names, n_rows=4, n_cols=4):
fig_size = (n_cols * 5, n_rows * 5)
n1, n2 = hyper_param_names
fig, axes = plt.subplots(nrows=n_rows, ncols=n_cols, figsize=fig_size)
# plt.tight_layout()
for i, ax in enumerate(axes.flat):
train_losses, valid_losses = loaded_experiments[i]["train_losses"], loaded_experiments[i]["valid_losses"]
ax.plot(train_losses, label=f"TRL, min={np.min(train_losses):.3f}")
ax.plot(valid_losses, label=f"VAL, min={np.min(valid_losses):.3f} at step={np.argmin(valid_losses)}")
ax.set_xlabel("Epochs")
ax.set_ylabel("Loss")
ax.set_title(f"{n1}={loaded_experiments[i][n1]}, {n2}={loaded_experiments[i][n2]}")
ax.legend(loc="upper right")
plt.show()
Get Experiment Results¶
def get_experiment_results(loaded_experiments, test_hyperparam_names, extra_loader):
experiment_results = []
n1, n2 = test_hyperparam_names
for i, exp in enumerate(loaded_experiments):
pred_scores, true_labels, pred_labels = get_predictions(exp['model_state_dict'], extra_loader)
experiment_results.append({
n1: exp[n1],
n2: exp[n2],
"true_labels": true_labels,
"pred_labels": pred_labels,
"pred_scores": pred_scores
})
print(f"First 10 true labels:")
[print(num, end=" ") for num in true_labels[:10]]
print(f"...\n")
print(f"First 10 pred labels:")
[print(num, end=" ") for num in pred_labels[:10]]
print(f"...\n")
print(f"First 5 pred_scores:")
[print(num, end=" ") for num in pred_scores[:5]]
print(f"...\n")
# del pred_scores, true_labels, pred_lables
torch.cuda.empty_cache()
return experiment_results
Confusion Matrix¶
def plot_cm(experiment_results, hyper_param_names, n_rows=4, n_cols=4):
fig, axes = plt.subplots(n_rows, n_cols, figsize=(n_cols * 5, n_rows * 5))
axes = axes.flatten()
hparam_1, hparam_2 = hyper_param_names
for i, exp_rs in enumerate(experiment_results):
true_labels, pred_labels = exp_rs['true_labels'], exp_rs['pred_labels']
cm = confusion_matrix(true_labels, pred_labels)
disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=range(0,10))
disp.plot(ax=axes[i], cmap = plt.cm.Blues)
axes[i].set_title(f"Exp {i+1}: {hparam_1}={exp_rs[hparam_1]}, {hparam_2}={exp_rs[hparam_2]}")
plt.tight_layout()
plt.show()
Precision-Recall Curve¶
def plot_pr(experiment_results, hyper_param_names, n_rows=4, n_cols=4):
fig, axes = plt.subplots(n_rows,n_cols, figsize=(n_cols * 5, n_rows * 5))
axes = axes.flatten()
hparam_1, hparam_2 = hyper_param_names
accuracies = []
f1_scores = []
for i, exp_rs in enumerate(experiment_results):
true_labels, pred_labels, pred_scores = exp_rs['true_labels'], exp_rs['pred_labels'], exp_rs['pred_scores']
true_labels_bin, pred_labels_bin = label_binarize(true_labels, classes=range(0,10)), label_binarize(pred_labels, classes=range(0,10))
accuracy, precision, recall, f1 = get_metrics(true_labels, pred_labels)
accuracies.append(accuracy)
f1_scores.append(f1)
for j in range(0, 10):
# print(f"Class {j}: Prec:{precision[j]:.2f}, Recall:{recall[j]:.2f}, F_1 Score:{f1[j]:.2f}")
precision_i, recall_i, _ = precision_recall_curve(true_labels_bin[:, j], np.array(pred_scores)[:, j])
average_precision = average_precision_score(true_labels_bin[:, j], np.array(pred_scores)[:, j])
axes[i].step(recall_i, precision_i, where="post", label=f"Class {j} AP={average_precision:.2f}")
axes[i].set_title(f"PR-Curve {hparam_1}={exp_rs[hparam_1]}, {hparam_2}={exp_rs[hparam_2]}")
axes[i].legend()
axes[i].set_xlabel("Recall")
axes[i].set_ylabel("Precision")
# for j in range(i+1, 16):
# fig.delaxes(axes[j])
plt.tight_layout()
plt.show()
return accuracies, f1_scores
ROC-AUC Curve¶
def plot_rocauc(experiment_results, hyper_param_names, curve_type, n_rows=4, n_cols=4):
fig, axes = plt.subplots(n_rows, n_cols, figsize=(n_cols * 5, n_rows * 5))
axes = axes.flatten()
hparam_1, hparam_2 = hyper_param_names
for i, exp_rs in enumerate(experiment_results):
true_labels, pred_scores = exp_rs['true_labels'], exp_rs['pred_scores']
true_labels_bin = label_binarize(true_labels, classes=range(0, 10))
# All Classes' ROC curve & ROC Area Under Curve
fpr = dict()
tpr = dict()
roc_auc = dict()
for j in range(10):
fpr[j], tpr[j], _ = roc_curve(true_labels_bin[:, j], np.array(pred_scores)[:, j])
roc_auc[j] = auc(fpr[j], tpr[j])
# Macro-Average ROC & ROC-AUC
all_fpr = np.unique(np.concatenate([fpr[j] for j in range(10)]))
mean_tpr = np.zeros_like(all_fpr)
for j in range(10):
mean_tpr += np.interp(all_fpr, fpr[j], tpr[j])
mean_tpr /= 10
fpr["macro"] = all_fpr
tpr["macro"] = mean_tpr
roc_auc["macro"] = auc(fpr["macro"], tpr["macro"])
# Compute micro-average ROC curve and ROC area
fpr["micro"], tpr["micro"], _ = roc_curve(true_labels_bin.ravel(), np.array(pred_scores).ravel())
roc_auc["micro"] = auc(fpr["micro"], tpr["micro"])
# Plot only Macro or Micro ROC curves
if curve_type == "macro_micro":
axes[i].plot(fpr["macro"], tpr["macro"], label=f"Macro (AUC={roc_auc['macro']:.2f})")
axes[i].plot(fpr["micro"], tpr["micro"], label=f"Micro (AUC={roc_auc['micro']:.2f})")
elif curve_type == "all":
# Plot all ROC curves
for j in range(10):
axes[i].plot(fpr[j], tpr[j], label=f"Class {j} (AUC={roc_auc[j]:.2f})")
axes[i].plot([0, 1], [0, 1], "k--")
axes[i].set_xlabel("False Positive Rate")
axes[i].set_ylabel("True Positive Rate")
axes[i].set_title(f"ROC Curve {i+1}, {hparam_1}={exp_rs[hparam_1]}, {hparam_2}={exp_rs[hparam_2]}")
axes[i].legend(loc='lower right')
plt.tight_layout()
plt.show()
3.0.1 Datasets¶
def split_train_valid(train_dataset, train_ratio):
ori_len = len(train_dataset)
train_size = int(train_ratio * ori_len)
valid_size = ori_len - train_size
# These are subsets!! Don't directly use them or you will spend 2 hours solving for it.
train_subset, valid_subset = random_split(train_dataset, [train_size, valid_size])
# Re-construct two SVHNDataset object from indecies
train_dataset_ = train_dataset.overwrite(indices=train_subset.indices)
valid_dataset_ = train_dataset.overwrite(indices=valid_subset.indices)
return train_dataset_, valid_dataset_
3.1 Experiment 1: Optimizer¶
In the standard process of gradient descent, each update is proportional to the negative gradient (first-order derivative) of the loss function with respect to the parameter. In this traditional process, the learning rate is fixed, and it may cause problems.
- Oscillations. If locally, the learning rate is too high, the model will jump around the local minimum.
- Slow convergence. If locally, the learning rate is too low, the model will spend a lot of epochs to converge to a local minimum.
To solve this problem, we enable the learning rate to be adaptive by introducing the "momentum", a velocity-like term which accumulates past gradients in the direction of consistent descent.
- The velocity term is the weighted sum of previous gradients.
- ...such that the update direction does not only rely on the current gradient, but also on previous ones.
The update of velocity is represnted as: $$ v_t=\beta v_{t-1} + (1-\beta)\cdot\nabla J(\theta) $$ where $\beta$ is the momentum coefficient. In our experiments, $\beta$ will be fixed to $0.9$.
The update of parameters will be: $$ \theta_{t} = \theta_{t-1}-\eta\cdot v_{t} $$ In this experiment, we focus on the performance of different optimizers, each has its own optimized way to update the momentum. We will fix other variables, including transform, epoch number and learning rate, and only adjust the optimizers. There are a few optimizers to be chozen:
- Adaptive Moment Estimation (Adam)
- Stochastic Gradient Descent (SGD)
- Root Mean Square Propagation (RMSprop)
- Adam with Weight Decay (AdamW)
- Adaptive Gradient Algorithm (Adgrad)
- SGD with Momentum and Nesterove Accelerated Gradient
# Universal Train Dataset without splitting
exp1_universal_train_dataset = SVHNDataset(mat_file=os.path.join(path_dataset,"train_32x32.mat"))
# Train-Validation Split
exp1_train_dataset, exp1_valid_dataset = split_train_valid(exp1_universal_train_dataset, train_ratio=0.8)
if not isinstance(exp1_train_dataset, SVHNDataset) or not isinstance(exp1_valid_dataset, SVHNDataset):
raise TypeError("CISC3024 Custom Error: The dataset should be an instance of SVHNDataset.")
# Normalize
exp1_mean, exp1_std = exp1_train_dataset.get_meanstd()
exp1_hyperparams = {
"num_epochs": 25,
"lr": 1e-3,
"criterion": nn.CrossEntropyLoss(),
"transform": A.Compose([
A.Normalize(mean=exp1_mean, std=exp1_std),
ToTensorV2()
])
}
# Inject Transform
exp1_train_dataset.transform = exp1_hyperparams['transform']
exp1_valid_dataset.transform = exp1_hyperparams['transform']
# Test Dataset
exp1_test_dataset = SVHNDataset(mat_file=os.path.join(path_dataset, "test_32x32.mat"), transform=exp1_hyperparams['transform'])
del exp1_universal_train_dataset
print(f"Training Size:{exp1_train_dataset.__len__()}, Validation Size:{exp1_valid_dataset.__len__()}")
print(f"Channel Means: {exp1_mean}\nChannel Stds: {exp1_std}")
Training Size:58605, Validation Size:14652 Channel Means: [0.43772128224372864, 0.44378969073295593, 0.4728474020957947] Channel Stds: [0.19793079793453217, 0.20086902379989624, 0.1968136429786682]
Define changing & non-changing hyper parameters.
exp1_models = [SmallVGG().to(device) for _ in range(0,6)]
candidate_optimizers = [
optim.Adam(exp1_models[0].parameters(), lr=exp1_hyperparams['lr']),
optim.SGD(exp1_models[1].parameters(), lr=exp1_hyperparams['lr'], momentum=0.9),
optim.RMSprop(exp1_models[2].parameters(), lr=exp1_hyperparams['lr']),
optim.AdamW(exp1_models[3].parameters(), lr=exp1_hyperparams['lr'], weight_decay=0.01),
optim.Adagrad(exp1_models[4].parameters(), lr=exp1_hyperparams['lr']),
optim.SGD(exp1_models[5].parameters(), lr=exp1_hyperparams['lr'], momentum=0.9, nesterov=True)]
for model in exp1_models:
print(id(model), end=", ")
3044296310112, 3043927082032, 3043927083520, 3043927084144, 3043927085008, 3044137279152,
Train, Validation and Test datasets.
Train, Validation and Test Data Loaders.
# Data Loaders
exp1_train_loader = DataLoader(exp1_train_dataset, batch_size=128, shuffle=True)
exp1_valid_loader = DataLoader(exp1_valid_dataset, batch_size=128, shuffle=True)
exp1_test_loader = DataLoader(exp1_test_dataset, batch_size=128, shuffle=False)
Run Experiments
def run_exp1(optimizers, models, hyper_params, train_loader, valid_loader):
experiments = []
for i, [optimizer, exp1_model] in enumerate(zip(optimizers, models)):
print(f"Experiment {i+1}. Running experiment on optimizer: {optimizer.__class__.__name__}")
criterion = hyper_params['criterion']
num_epochs = hyper_params['num_epochs']
train_losses, valid_losses = train_and_evaluate(exp1_model, train_loader, valid_loader, criterion, optimizer, num_epochs)
experiments.append({
"optimizer": optimizer.__class__.__name__,
"others":"same",
"train_losses": train_losses,
"valid_losses": valid_losses,
"model_state_dict": exp1_model.state_dict()
})
del exp1_model, criterion, optimizer
torch.cuda.empty_cache()
return experiments
exp1 = run_exp1(candidate_optimizers, exp1_models, exp1_hyperparams, exp1_train_loader, exp1_valid_loader)
time_str = str(time.time()).replace(".","")
torch.save(exp1, f"./models/exp1_{time_str}.pth")
Load Experiments
Load Experiment objects and plot results.
exp1_loaded = torch.load("./models/exp1_17305518422052872.pth")
exp1_results = get_experiment_results(exp1_loaded, test_hyperparam_names=["optimizer", "others"], extra_loader=exp1_test_loader)
plot_el(exp1_loaded, ["optimizer", "others"], n_rows=1, n_cols=6)
plot_cm(exp1_results, ["optimizer", "others"], n_rows=1, n_cols=6)
Precision-Recall Curve
Overfitted to inspect training performance. This "Best" does not tell the optimum optimizer.
exp1_accuracies, exp1_f1s = plot_pr(exp1_results, ["optimizer", "others"], n_rows=1, n_cols=6)
print_metrics(exp1_accuracies, exp1_f1s)
Accuracies: 0.907 0.196 0.906 0.901 0.735 0.196 F1 Score Lists: 0.912 0.935 0.938 0.874 0.921 0.905 0.881 0.906 0.847 0.867 | Avg F1=0.899, Std F1=0.028664446684648247 0.000 0.328 0.000 0.000 0.000 0.000 0.000 0.000 0.000 0.000 | Avg F1=0.033, Std F1=0.09827503131926378 0.888 0.933 0.939 0.876 0.913 0.903 0.878 0.923 0.850 0.865 | Avg F1=0.897, Std F1=0.028470034992916605 0.887 0.935 0.936 0.871 0.914 0.907 0.871 0.908 0.830 0.843 | Avg F1=0.890, Std F1=0.03434799781092271 0.723 0.840 0.802 0.638 0.776 0.700 0.647 0.789 0.515 0.628 | Avg F1=0.706, Std F1=0.09492988307421246 0.000 0.328 0.000 0.000 0.000 0.000 0.000 0.000 0.000 0.000 | Avg F1=0.033, Std F1=0.09827503131926378 Best: 1-th
plot_rocauc(exp1_results, ["optimizer", "others"], curve_type="all", n_rows=1, n_cols=6)
plot_rocauc(exp1_results, ["optimizer", "others"], curve_type="macro_micro", n_rows=1, n_cols=6)
3.2 Experiment 2: Epoch Number and Learning Rate¶
This experiment seeks to find the effect of different combinations of epoch numbers and learning rates on the training & testing performance of the neural network.
3.2.1 Experiment 2-1: Rough Search¶
In this sub-experiment, we perform a rough search on the epochs and learning rate. We promoted four possible values for both parameters: $$ \text{candidate epochs}=\{10, 15, 20, 25\} $$ $$ \text{candidate lr}=\{1.0\times 10^{-3},1.0\times 10^{-4},1.0\times 10^{-5},1.0\times 10^{-6}\} $$
# Universal Train Dataset without splitting
exp2_universal_train_dataset = SVHNDataset(mat_file=os.path.join(path_dataset,"train_32x32.mat"))
# Train & Validation Datasets
exp2_train_dataset, exp2_valid_dataset = split_train_valid(exp2_universal_train_dataset, train_ratio=0.8)
del exp2_universal_train_dataset # Unload the mill and kill the donkey
# Normalize
exp2_mean, exp2_std = exp2_train_dataset.get_meanstd()
exp2_hyperparams = {
"criterion": nn.CrossEntropyLoss(),
"transform": A.Compose([
A.Normalize(mean=exp2_mean, std=exp2_std),
ToTensorV2()
]),
"optimizer":optim.Adam,
}
exp2_train_dataset.transform = exp2_hyperparams['transform']
exp2_valid_dataset.transform = exp2_hyperparams['transform']
# Test Dataset
exp2_test_dataset = SVHNDataset(mat_file=os.path.join(path_dataset,"test_32x32.mat"), transform=exp2_hyperparams['transform'])
print(f"Training Size:{exp2_train_dataset.__len__()}, Validation Size:{exp2_valid_dataset.__len__()}")
print(f"Channel Means: {exp2_mean}\nChannel Stds: {exp2_std}")
Training Size:58605, Validation Size:14652 Channel Means: [0.4374935030937195, 0.44353100657463074, 0.4726291298866272] Channel Stds: [0.1980431079864502, 0.2009742110967636, 0.19704842567443848]
candidate_epochs = [10, 15, 20, 25]
candidate_lr = [1e-3, 1e-4, 1e-5, 1e-6]
exp2_train_loader = DataLoader(exp2_train_dataset, batch_size=128, shuffle=True)
exp2_valid_loader = DataLoader(exp2_valid_dataset, batch_size=128, shuffle=True)
exp2_test_loader = DataLoader(exp2_test_dataset, batch_size=128, shuffle=False)
def run_exp2_1(epochs, lr_list, hyper_params, train_loader, valid_loader):
combinations = list(itertools.product(epochs, lr_list))
experiments = []
for i, combo in enumerate(combinations):
num_epochs, lr = combo
print(f"Running Exp {i+1}: num_epoch={num_epochs}, lr={lr}")
this_model = SmallVGG().to(device)
criterion = hyper_params['criterion']
optimizer = hyper_params['optimizer'](this_model.parameters(), lr=lr)
train_losses, valid_losses = train_and_evaluate(this_model, train_loader, valid_loader, criterion, optimizer, num_epochs)
experiments.append({
"num_epochs": num_epochs,
"lr": lr,
"train_losses": train_losses,
"valid_losses": valid_losses,
"model_state_dict": this_model.state_dict()
})
del this_model, criterion, optimizer
return experiments
exp2_1 = run_exp2_1(candidate_epochs, candidate_lr, exp2_hyperparams, exp2_train_loader, exp2_valid_loader)
time_str = str(time.time()).replace(".","")
torch.save(exp2_1, f"./models/exp2-1_{time_str}.pth")
exp2_1_loaded = torch.load("./models/exp2-1_17305539358378615.pth")
exp2_1_results = get_experiment_results(exp2_1_loaded, test_hyperparam_names=["num_epochs", "lr"], extra_loader=exp2_test_loader)
3.2.1-1 Epoch-Loss Curve¶
We found that the key to the training performance of a model is the learning rate. Epoch number only controls the progress of training.
From the perspective of learning rate (each column), only the learning rate of $1.0\times 10^{-3}$ shows a sign of convergence under each candidate epochs. With this learning rate, the model even overfitted under experiments with an epoch number over $15$. The best model we conclude from this rough selection is the one with the combination of $\text{num\_epoch}=10\land\text{lr}=1.0\times10^{-3}$. The minimum validation loss is $36.648$ at step $7$, which is the lowest of all $16$ samples. However, this doesn't mean that it is optimal since it may jump over a local minimum.
Moreover, as we inspect the performance on smaller learning rates, we found that they tend to converge in a way further epoch steps. Moreover, for the learning rate $1.0\times 10^{-6}$, the learning rate is too low that the model can not even fit under nearly-finite epochs.
plot_el(exp2_1_loaded, ["num_epochs", "lr"], n_rows=4, n_cols=4)
3.2.1-2 Confusion Matrix¶
In this rough search, the confusion matrix varies on different learning rates, and tends to be identical on different epochs.
Under a same epoch number, as leraning rate gets smaller, the confusion matrix gets "blurrer", meaning that the prediction is less accurate from the whole perspective. The learning rates under $1.0\times 10^{-5}$ are too low that the model can't converge in a reasonably number of epochs. For the lowest learning rate of $1.0\times 10^{-6}$, the model is not fitted at all. It classifies every number into 1, the number with the richest amount in the dataset.
plot_cm(exp2_1_results, ["num_epochs", "lr"], n_rows=4, n_cols=4)
3.2.1-3 Precision-Recall Curve¶
From a numerical perspective over the testing performance, the combination of $\text{num\_epoch}=15\land\text{lr}=1.0\times10^{-3}$ gives the highest accuracy of $0.907$, highest average $F_1$ score of $0.916$ and the lowest $F_1$ variance per-class of $0.025$.
exp2_1_accuracies, exp2_1_f1s = plot_pr(exp2_1_results, ["num_epochs", "lr"], n_rows=4, n_cols=4)
print_metrics(exp2_1_accuracies, exp2_1_f1s)
Accuracies: 0.917 0.825 0.336 0.196 0.912 0.865 0.471 0.196 0.907 0.878 0.663 0.196 0.908 0.880 0.713 0.196 F1 Score Lists: 0.918 0.942 0.947 0.889 0.925 0.913 0.900 0.931 0.855 0.868 | Avg F1=0.909, Std F1=0.029051096513454423 0.811 0.904 0.880 0.774 0.838 0.791 0.745 0.860 0.703 0.731 | Avg F1=0.804, Std F1=0.06335319369501424 0.040 0.570 0.378 0.189 0.273 0.087 0.002 0.237 0.026 0.004 | Avg F1=0.180, Std F1=0.17824165219527116 0.000 0.328 0.000 0.000 0.000 0.000 0.000 0.000 0.000 0.000 | Avg F1=0.033, Std F1=0.09827503131926378 0.914 0.934 0.945 0.882 0.932 0.896 0.878 0.919 0.863 0.875 | Avg F1=0.904, Std F1=0.02715514894076909 0.866 0.930 0.912 0.808 0.881 0.829 0.809 0.899 0.794 0.769 | Avg F1=0.850, Std F1=0.05240811428774516 0.154 0.743 0.509 0.321 0.550 0.430 0.122 0.550 0.005 0.080 | Avg F1=0.347, Std F1=0.23453887474256918 0.000 0.328 0.000 0.000 0.000 0.000 0.000 0.000 0.000 0.000 | Avg F1=0.033, Std F1=0.09827503131926378 0.916 0.926 0.937 0.875 0.922 0.905 0.879 0.919 0.858 0.863 | Avg F1=0.900, Std F1=0.027185167062975517 0.875 0.932 0.913 0.830 0.890 0.849 0.841 0.904 0.810 0.815 | Avg F1=0.866, Std F1=0.04073441033554439 0.596 0.770 0.749 0.632 0.679 0.614 0.618 0.699 0.294 0.526 | Avg F1=0.618, Std F1=0.1283615011641036 0.000 0.328 0.000 0.000 0.000 0.000 0.000 0.000 0.000 0.000 | Avg F1=0.033, Std F1=0.09827503131926378 0.904 0.943 0.926 0.881 0.914 0.913 0.876 0.921 0.863 0.850 | Avg F1=0.899, Std F1=0.02860358424237646 0.879 0.924 0.925 0.821 0.886 0.867 0.832 0.905 0.820 0.820 | Avg F1=0.868, Std F1=0.040548747485917386 0.689 0.842 0.773 0.646 0.711 0.672 0.640 0.763 0.460 0.549 | Avg F1=0.674, Std F1=0.10523037482252583 0.000 0.328 0.000 0.000 0.000 0.000 0.000 0.000 0.000 0.000 | Avg F1=0.033, Std F1=0.09827503131926378 Best: 1-th
plot_rocauc(exp2_1_results, ["num_epochs", "lr"], curve_type="all", n_rows=4, n_cols=4)
plot_rocauc(exp2_1_results, ["num_epochs", "lr"], curve_type="macro_micro", n_rows=4, n_cols=4)
3.2.2 Experiment 2-2: Detailed¶
Previous sub-experiment tells that the best combination from all the listed ones is $\text{num\_epoch}=15 \land \text{lr}=1.0\times 10^{-3}$.
This is a rough solution, as it may jump over local minimums. We want to find a better learning rate around $1.0\times 10^{-3}$, with an even more detailed distinction between candidate values, so that it may reveal a missing local minimum without using too much epochs.
We conducted an excessive experiment, purposely seeking an overfitting point over the listed candidate learning rates. We do this by setting the epoch number to $50$.
exp2_2_hyperparams = {
"num_epoch": 15,
"criterion": nn.CrossEntropyLoss(),
"transform": A.Compose([
A.Normalize(mean=exp2_mean, std=exp2_std),
ToTensorV2()
]),
"optimizer":optim.Adam,
}
# More detailed candidate learning rates around 1e-3, that is 10e-4.
exp2_2_candidate_lr = np.geomspace(1e-4, 5e-3, 8)
print(exp2_2_candidate_lr)
[0.0001 0.00017487 0.00030579 0.00053472 0.00093506 0.00163512 0.0028593 0.005 ]
def run_exp2_2(lr_list, hyper_params, train_loader, test_loader):
experiments = []
for i, lr in enumerate(lr_list):
print(f"Running Exp {i+1}: lr={lr}")
this_model = SmallVGG().to(device)
num_epochs = hyper_params['num_epoch']
criterion = hyper_params['criterion']
optimizer = hyper_params['optimizer'](this_model.parameters(), lr=lr)
train_losses, valid_losses = train_and_evaluate(this_model, train_loader, test_loader, criterion, optimizer, num_epochs)
experiments.append({
"num_epochs": num_epochs,
"lr": lr,
"train_losses": train_losses,
"valid_losses": valid_losses,
"model_state_dict": this_model.state_dict()
})
del this_model, criterion, optimizer
return experiments
exp2_2 = run_exp2_2(exp2_2_candidate_lr, exp2_2_hyperparams, exp2_train_loader, exp2_valid_loader)
time_str = str(time.time()).replace(".", "")
torch.save(exp2_2, f"./models/exp2-2_{time_str}.pth")
exp2_2_loaded = torch.load("./models/exp2-2_17305693866477516.pth")
exp2_2_results = get_experiment_results(exp2_2_loaded, test_hyperparam_names=["num_epochs", "lr"], extra_loader=exp2_test_loader)
3.2.2-1 Epoch-Loss Curve¶
For timing issues, an epoch of $15$ is our tolerance line. By inspecting the epoch-loss curve, we found that all the experiments are nearly overfitted around the end of the epochs.
From all the detailed searches, the learning rate of 9.35e-4, yields the lowest validation loss of $38.227$ at step $7$, which is the overfitting point. This minimum is worse than the one produced by the learning rate 1e-3, which is $36.549$. Therefore, for the trade-off of time and performance, we choose the combination of epoch=$15$ and lr=$0.001$ for the following experiments.
plot_el(exp2_2_loaded, ["num_epochs", "lr"], n_rows=2, n_cols=4)
3.2.2-2 Confusion Matrix¶
At a glance, from the perspective of confusion matrix, the testing performance on unknown data is roughly identical.
plot_cm(exp2_2_results, ["num_epochs", "lr"], n_rows=2, n_cols=4)
3.2.2-3 Precision-Recall Curve¶
By inspecting the evaluation metrics, we found our judgement correct. From all the over-fitted model, the model with learning rate of $7.0\times 10^{-4}$ yields the highest accuracy of $0.909$ and the highest average per-class $F_1$ score of $0.903$. Besides, the per-class $F_1$ score is also less variant under the learning rate of $7.0\times 10^{-4}$, with the standard deviation of $0.023$.
exp2_2_accuracies, exp2_2_f1s = plot_pr(exp2_2_results, ["num_epochs", "lr"], n_rows=2, n_cols=4)
print_metrics(exp2_2_accuracies, exp2_2_f1s)
Accuracies: 0.873 0.885 0.905 0.906 0.912 0.903 0.893 0.882 F1 Score Lists: 0.873 0.929 0.912 0.822 0.888 0.851 0.829 0.901 0.795 0.798 | Avg F1=0.860, Std F1=0.04542290722465954 0.892 0.936 0.916 0.850 0.905 0.854 0.843 0.906 0.819 0.808 | Avg F1=0.873, Std F1=0.04146430928239567 0.908 0.942 0.941 0.874 0.921 0.888 0.857 0.924 0.840 0.839 | Avg F1=0.893, Std F1=0.03748405274950539 0.911 0.942 0.941 0.868 0.923 0.889 0.875 0.917 0.868 0.829 | Avg F1=0.896, Std F1=0.03485676831682514 0.913 0.936 0.943 0.887 0.928 0.912 0.875 0.926 0.852 0.875 | Avg F1=0.905, Std F1=0.029020749926052154 0.901 0.932 0.937 0.880 0.908 0.912 0.873 0.918 0.835 0.841 | Avg F1=0.894, Std F1=0.033859473636702976 0.887 0.926 0.934 0.862 0.900 0.882 0.854 0.915 0.829 0.836 | Avg F1=0.883, Std F1=0.035070141262265965 0.873 0.921 0.919 0.859 0.888 0.880 0.832 0.891 0.814 0.827 | Avg F1=0.870, Std F1=0.03530344586873121 Best: 5-th
3.2.2-4 ROC-AUC Curve¶
The ROC-AUC Curve under all the detailed candidate learning rates are roughly identical.
plot_rocauc(exp2_2_results, ["num_epochs", "lr"], curve_type="all", n_rows=2, n_cols=4)
plot_rocauc(exp2_2_results, ["num_epochs", "lr"], curve_type="macro_micro", n_rows=2, n_cols=4)
3.3 Experiment 3: Image Augmentation Parameters¶
3.3.1 Experiment 3-1: Rotation Angles and Crop Percentages¶
exp3_universal_train_dataset = SVHNDataset(mat_file=os.path.join(path_dataset, "train_32x32.mat"))
exp3_train_dataset, exp3_valid_dataset = split_train_valid(exp3_universal_train_dataset, train_ratio=0.8)
del exp3_universal_train_dataset
# The mean & std here will only be used for experiment 3-1.
exp3_1_mean, exp3_1_std = exp3_train_dataset.get_meanstd()
exp3_1_hyperparams = {
"num_epochs": 50,
"lr": 1e-3,
"criterion": nn.CrossEntropyLoss(),
"optimizer": optim.Adam,
"transform": A.Compose([
A.Normalize(mean=exp3_1_mean, std=exp3_1_std),
ToTensorV2()
])
}
# Transform of train dataset will be altered in the experiments.
exp3_valid_dataset.transform = exp3_1_hyperparams['transform']
exp3_test_dataset = SVHNDataset(mat_file=os.path.join(path_dataset, "test_32x32.mat"), transform=exp3_1_hyperparams["transform"])
print(f"Training Size:{exp3_train_dataset.__len__()}, Validation Size:{exp3_valid_dataset.__len__()}")
print(f"Channel Means: {exp3_1_mean}\nChannel Stds: {exp3_1_std}")
Training Size:58605, Validation Size:14652 Channel Means: [0.43757779153461307, 0.443731916543914, 0.47288011100561006] Channel Stds: [0.19818649325776583, 0.20113878491802037, 0.1971410629011666]
# Group 1
candidate_angles = [15, 30, 45, 60]
candidate_crops = [0.08, 0.24, 0.40, 0.60] # Left Boundary
exp3_valid_loader = DataLoader(exp3_valid_dataset, batch_size=128, shuffle=False)
exp3_test_loader = DataLoader(exp3_test_dataset, batch_size=128, shuffle=False)
def run_exp3_1(angles, crops, hyper_params, train_dataset, valid_loader):
combinations = list(itertools.product(angles, crops))
experiments = []
for i, combo in enumerate(combinations):
angle, crop = combo
print(f"Running Exp {i+1}: angles={angle}, crop={crop}")
this_model = SmallVGG().to(device)
num_epochs = hyper_params['num_epochs']
lr = hyper_params['lr']
criterion = hyper_params['criterion']
optimizer = hyper_params['optimizer'](this_model.parameters(), lr=lr)
# Define Transform
this_transform = A.Compose([
A.RandomResizedCrop(32, 32, scale=(crop, 1.0)),
A.Rotate(limit=angle),
A.Normalize(mean=exp3_1_mean, std=exp3_1_std),
ToTensorV2()
])
# Generate Dataset
print(f"Exp {i+1}: Generating dataset from transform")
train_dataset.transform = this_transform
train_loader = DataLoader(train_dataset, batch_size=128, shuffle=True)
# Train Model
train_losses, valid_losses = train_and_evaluate(this_model,
train_loader,
valid_loader,
criterion,
optimizer,
num_epochs,
stop_early_params={
"min_delta": 0.01,
"patience": 5
})
experiments.append({
"angle": angle,
"crop": crop,
"train_losses": train_losses,
"valid_losses": valid_losses,
"model_state_dict": this_model.state_dict()
})
del this_model, criterion, optimizer
torch.cuda.empty_cache()
return experiments
exp3_1 = run_exp3_1(candidate_angles, candidate_crops, exp3_1_hyperparams, exp3_train_dataset, exp3_valid_loader)
time_str = str(time.time()).replace(".","")
torch.save(exp3_1, f"./models/exp3-1_{time_str}.pth")
exp3_1_loaded = torch.load("./models/exp3-1_17307349208257582.pth")
exp3_1_results = get_experiment_results(exp3_1_loaded, test_hyperparam_names=["angle", "crop"], extra_loader=exp3_test_loader)
plot_el(exp3_1_loaded, ["angle", "crop"], n_rows=4, n_cols=4)
plot_cm(exp3_1_results, ["angle", "crop"], n_rows=4, n_cols=4)
exp3_accuracies, exp3_f1s = plot_pr(exp3_1_results, ["angle", "crop"], n_rows=4, n_cols=4)
print_metrics(exp3_accuracies, exp3_f1s)
Accuracies: 0.909 0.902 0.915 0.926 0.901 0.908 0.920 0.927 0.894 0.889 0.906 0.929 0.894 0.892 0.909 0.918 F1 Score Lists: 0.918 0.936 0.937 0.867 0.934 0.907 0.888 0.901 0.858 0.873 | Avg F1=0.902, Std F1=0.028146078028585732 0.902 0.930 0.923 0.860 0.913 0.913 0.885 0.911 0.857 0.863 | Avg F1=0.896, Std F1=0.02593225413077912 0.913 0.933 0.949 0.905 0.929 0.924 0.878 0.897 0.837 0.891 | Avg F1=0.906, Std F1=0.030528486926295145 0.918 0.951 0.948 0.904 0.937 0.925 0.908 0.938 0.885 0.874 | Avg F1=0.919, Std F1=0.024707654109350188 0.875 0.927 0.926 0.881 0.923 0.910 0.886 0.893 0.842 0.858 | Avg F1=0.892, Std F1=0.027922326916596706 0.893 0.935 0.927 0.879 0.926 0.921 0.888 0.905 0.873 0.863 | Avg F1=0.901, Std F1=0.024139022374309215 0.903 0.948 0.940 0.891 0.938 0.927 0.904 0.918 0.870 0.880 | Avg F1=0.912, Std F1=0.025248490682396124 0.921 0.947 0.947 0.909 0.944 0.927 0.918 0.924 0.885 0.883 | Avg F1=0.920, Std F1=0.02202325661064801 0.895 0.935 0.917 0.862 0.918 0.904 0.878 0.905 0.811 0.803 | Avg F1=0.883, Std F1=0.04265224570562562 0.899 0.927 0.921 0.837 0.911 0.885 0.877 0.883 0.839 0.830 | Avg F1=0.881, Std F1=0.033704954984753885 0.875 0.929 0.944 0.879 0.935 0.910 0.882 0.914 0.836 0.864 | Avg F1=0.897, Std F1=0.03311069193894167 0.903 0.946 0.956 0.912 0.944 0.936 0.912 0.925 0.891 0.897 | Avg F1=0.922, Std F1=0.02147996881433538 0.881 0.923 0.919 0.850 0.918 0.893 0.874 0.889 0.863 0.856 | Avg F1=0.887, Std F1=0.025284746933263623 0.867 0.925 0.922 0.848 0.915 0.898 0.865 0.885 0.849 0.861 | Avg F1=0.884, Std F1=0.02832748402850103 0.889 0.939 0.933 0.880 0.929 0.914 0.884 0.915 0.860 0.866 | Avg F1=0.901, Std F1=0.027214294349668898 0.910 0.946 0.946 0.891 0.919 0.919 0.894 0.931 0.884 0.860 | Avg F1=0.910, Std F1=0.026501694953201833 Best: 12-th
plot_rocauc(exp3_1_results, ["angle", "crop"], curve_type="all", n_rows=4, n_cols=4)
plot_rocauc(exp3_1_results, ["angle", "crop"], curve_type="macro_micro", n_rows=4, n_cols=4)
3.3.2 Experiment 3-2: Aspect Ratios & Contrast Factors¶
exp3_2_hyperparams = {
"num_epochs": 50,
"lr": 1e-3,
"criterion": nn.CrossEntropyLoss(),
"optimizer": optim.Adam,
"crop":0.6,
"angle":45,
}
class ContrastEnhanceTransform:
def __init__(self, factor: Union[float, Tuple[float, float]]) -> None:
if isinstance(factor, tuple):
self.factor_min = factor[0]
self.factor_max = factor[1]
else:
self.factor_min = 1 / factor
self.factor_max = factor
def __call__(self, img: np.ndarray) -> np.ndarray:
_dtype = img.dtype
contrast_factor = random.uniform(self.factor_min, self.factor_max)
img = np.clip(img * contrast_factor, 0, 255) # apply contrast enhancement
return img.astype(_dtype)
# Group 2
candidate_ratios = [0.25, 0.42, 0.58, 0.75]
candidate_contrast_factors = [1.2, 1.4, 1.6, 1.8]
Control candidates for different variables
def run_exp3_2(ratios, contrast_factors, hyper_params, train_dataset, valid_dataset):
combinations = list(itertools.product(ratios, contrast_factors))
experiments = []
for i, combo in enumerate(combinations):
ratio, cf = combo
print(f"Running Exp {i+1}: ratio={ratio}, contrast_factor={cf}")
this_model = SmallVGG().to(device)
num_epochs = hyper_params['num_epochs']
lr = hyper_params['lr']
criterion = hyper_params['criterion']
optimizer = hyper_params['optimizer'](this_model.parameters(), lr=lr)
# Define Transform
this_mean, this_std = train_dataset.get_meanstd(contrast_factor=cf)
this_train_transform = A.Compose([
A.Lambda(image=lambda img, **kwargs: ContrastEnhanceTransform(cf)(img)), # Lambda customized transform block
A.RandomResizedCrop(32, 32, scale=(hyper_params['crop'], 1.0), ratio=(ratio, 1.0 / ratio)),
A.Rotate(limit=hyper_params['angle']),
A.Normalize(mean=this_mean, std=this_std),
ToTensorV2()
])
this_valid_transform = A.Compose([
A.Normalize(mean=this_mean, std=this_std),
ToTensorV2()
])
# Generate Dataset
print(f"Exp {i+1}: Generating dataset from transform")
train_dataset.transform = this_train_transform
valid_dataset.transform = this_valid_transform
train_loader = DataLoader(train_dataset, batch_size=128, shuffle=True)
valid_loader = DataLoader(valid_dataset, batch_size=128, shuffle=False)
# Train Model
train_losses, valid_losses = train_and_evaluate(this_model,
train_loader,
valid_loader,
criterion,
optimizer,
num_epochs,
stop_early_params={
"min_delta": 0.01,
"patience": 5
})
experiments.append({
"ratio": ratio,
"contrast_factor": cf,
"train_losses": train_losses,
"valid_losses": valid_losses,
"model_state_dict": this_model.state_dict()
})
del this_model, criterion, optimizer
# del train_loader, valid_loader
torch.cuda.empty_cache()
return experiments
exp3_2 = run_exp3_2(candidate_ratios, candidate_contrast_factors, exp3_2_hyperparams, exp3_train_dataset, exp3_valid_dataset)
time_str = str(time.time()).replace(".", "")
torch.save(exp3_2, f"./models/exp3-2_{time_str}.pth")
def exp3_2_get_experiment_results(loaded_experiments, test_hyperparam_names, extra_loaders):
experiment_results = []
n1, n2 = test_hyperparam_names
for i, [exp, extra_loader] in enumerate(zip(loaded_experiments, extra_loaders)):
pred_scores, true_labels, pred_labels = get_predictions(exp["model_state_dict"], extra_loader)
experiment_results.append({
n1: exp[n1],
n2: exp[n2],
"true_labels": true_labels,
"pred_labels": pred_labels,
"pred_scores": pred_scores
})
print(f"First 10 true labels: {true_labels[:10]}")
print(f"First 10 pred labels: {pred_labels[:10]}")
print(f"First 5 pred_scores: {pred_scores[:5]}")
torch.cuda.empty_cache()
return experiment_results
means_stds = [exp3_valid_dataset.get_meanstd(contrast_factor=cf) for cf in candidate_contrast_factors]
_exp3_2_test_loaders = []
for mean_std in means_stds:
this_mean, this_std = mean_std
this_transform = A.Compose([
A.Normalize(mean=this_mean, std=this_std),
ToTensorV2()
])
this_test_dataset = copy.deepcopy(exp3_valid_dataset)
this_test_dataset.transform = this_transform
this_loader = DataLoader(this_test_dataset, batch_size=128, shuffle=False)
_exp3_2_test_loaders.append(this_loader)
exp3_2_test_loaders = []
for _ in candidate_ratios:
exp3_2_test_loaders += _exp3_2_test_loaders
exp3_2_loaded = torch.load("./models/exp3-2_1730819819555844.pth")
exp3_2_results = exp3_2_get_experiment_results(exp3_2_loaded, test_hyperparam_names=["ratio", "contrast_factor"], extra_loaders=exp3_2_test_loaders)
plot_el(exp3_2_loaded, ["ratio", "contrast_factor"], n_rows=4, n_cols=4)
plot_cm(exp3_2_results, ["ratio", "contrast_factor"], n_rows=4, n_cols=4)
exp3_2_accuracies, exp3_2_f1s = plot_pr(exp3_2_results, ["ratio", "contrast_factor"], n_rows=4, n_cols=4)
print_metrics(exp3_2_accuracies, exp3_2_f1s)
Accuracies: 0.920 0.918 0.917 0.917 0.918 0.911 0.913 0.922 0.914 0.916 0.913 0.913 0.923 0.923 0.917 0.922 F1 Score Lists: 0.894 0.937 0.947 0.909 0.939 0.912 0.896 0.926 0.880 0.912 | Avg F1=0.915, Std F1=0.02067976693819648 0.908 0.934 0.939 0.906 0.934 0.915 0.901 0.925 0.869 0.904 | Avg F1=0.913, Std F1=0.019974054656940087 0.925 0.939 0.929 0.904 0.940 0.916 0.908 0.916 0.852 0.902 | Avg F1=0.913, Std F1=0.02390683851734753 0.890 0.934 0.946 0.898 0.935 0.904 0.907 0.935 0.883 0.894 | Avg F1=0.912, Std F1=0.021508105940274098 0.912 0.936 0.946 0.912 0.930 0.908 0.887 0.922 0.873 0.910 | Avg F1=0.914, Std F1=0.0205745618350115 0.904 0.927 0.939 0.890 0.937 0.900 0.897 0.904 0.866 0.900 | Avg F1=0.907, Std F1=0.02124979354773411 0.912 0.932 0.936 0.888 0.930 0.904 0.889 0.926 0.872 0.898 | Avg F1=0.909, Std F1=0.02080427179136081 0.923 0.937 0.947 0.907 0.930 0.922 0.898 0.922 0.883 0.912 | Avg F1=0.918, Std F1=0.017568940244675372 0.904 0.933 0.936 0.904 0.932 0.908 0.906 0.905 0.872 0.890 | Avg F1=0.909, Std F1=0.019079743671399765 0.919 0.932 0.938 0.901 0.925 0.913 0.903 0.924 0.860 0.908 | Avg F1=0.912, Std F1=0.021048150026752813 0.907 0.933 0.946 0.899 0.938 0.890 0.899 0.920 0.862 0.886 | Avg F1=0.908, Std F1=0.024824526478269996 0.912 0.931 0.944 0.901 0.924 0.894 0.882 0.927 0.864 0.905 | Avg F1=0.908, Std F1=0.0230442781174334 0.920 0.939 0.945 0.910 0.921 0.915 0.906 0.928 0.889 0.912 | Avg F1=0.919, Std F1=0.015365852898010962 0.929 0.942 0.945 0.912 0.928 0.908 0.907 0.936 0.865 0.911 | Avg F1=0.918, Std F1=0.022282154155352875 0.925 0.933 0.943 0.895 0.935 0.903 0.901 0.924 0.871 0.905 | Avg F1=0.914, Std F1=0.021094009578053304 0.927 0.940 0.944 0.896 0.936 0.912 0.920 0.924 0.879 0.910 | Avg F1=0.919, Std F1=0.01913420145325484 Best: 16-th
plot_rocauc(exp3_2_results, ["ratio", "contrast_factor"], curve_type="all", n_rows=4, n_cols=4)
plot_rocauc(exp3_2_results, ["ratio", "contrast_factor"], curve_type="macro_micro", n_rows=4, n_cols=4)
def exp3_2_peek(dataset, hyper_params, index=21642, ratio=1.0, contrast_factor=0):
exp3_train_dataset_sample = dataset.overwrite(range(exp3_train_dataset.__len__())) # Deep copy of original
temp_mean, temp_std = exp3_train_dataset_sample.get_meanstd(contrast_factor=contrast_factor)
exp3_train_dataset_sample.transform = A.Compose([
A.Lambda(image=lambda img, **kwargs: ContrastEnhanceTransform(contrast_factor)(img)), # Lambda customized transform block
A.RandomResizedCrop(32, 32, scale=(hyper_params["crop"], 1.0), ratio=(ratio, 1.0 / ratio)),
A.Rotate(limit=hyper_params["angle"]),
A.Normalize(mean=temp_mean, std=temp_std),
ToTensorV2()
])
peek(exp3_train_dataset_sample, index=index)
del exp3_train_dataset_sample, temp_mean, temp_std
exp3_2_peek(exp3_train_dataset, exp3_2_hyperparams, index=9099, ratio=0.75, contrast_factor=1.2)
D:\Temps\temp\ipykernel_87160\1409744167.py:5: UserWarning: Using lambda is incompatible with multiprocessing. Consider using regular functions or partial(). A.Lambda(image=lambda img, **kwargs: ContrastEnhanceTransform(contrast_factor)(img)), # Lambda customized transform block
Peeking data from training set of index 9099. Image Tnesor Size:torch.Size([3, 32, 32])
exp3_2_peek(exp3_train_dataset, exp3_2_hyperparams, index=9099, ratio=0.75, contrast_factor=1.4)
D:\Temps\temp\ipykernel_87160\1409744167.py:5: UserWarning: Using lambda is incompatible with multiprocessing. Consider using regular functions or partial(). A.Lambda(image=lambda img, **kwargs: ContrastEnhanceTransform(contrast_factor)(img)), # Lambda customized transform block
Peeking data from training set of index 9099. Image Tnesor Size:torch.Size([3, 32, 32])
exp3_2_peek(exp3_train_dataset, exp3_2_hyperparams, index=9099, ratio=0.75, contrast_factor=1.6)
D:\Temps\temp\ipykernel_87160\1409744167.py:5: UserWarning: Using lambda is incompatible with multiprocessing. Consider using regular functions or partial(). A.Lambda(image=lambda img, **kwargs: ContrastEnhanceTransform(contrast_factor)(img)), # Lambda customized transform block
Peeking data from training set of index 9099. Image Tnesor Size:torch.Size([3, 32, 32])
exp3_2_peek(exp3_train_dataset, exp3_2_hyperparams, index=9099, ratio=0.75, contrast_factor=1.8)
D:\Temps\temp\ipykernel_87160\1409744167.py:5: UserWarning: Using lambda is incompatible with multiprocessing. Consider using regular functions or partial(). A.Lambda(image=lambda img, **kwargs: ContrastEnhanceTransform(contrast_factor)(img)), # Lambda customized transform block
Peeking data from training set of index 9099. Image Tnesor Size:torch.Size([3, 32, 32])
3.4 Experiment 4: NN Structure¶
3.4.1 NN Structure¶
class Inception(nn.Module):
def __init__(self, in_channels: int, ch1x1: int, ch3x3_reduce: int, ch3x3: int,
ch5x5_reduce: int, ch5x5: int, pool_proj: int):
super(Inception, self).__init__()
# 1x1 conv batch
self.branch1x1 = nn.Conv2d(in_channels, ch1x1, kernel_size=1)
# 1x1 -> 3x3 conv batch
self.branch3x3 = nn.Sequential(
nn.Conv2d(in_channels, ch3x3_reduce, kernel_size=1),
nn.Conv2d(ch3x3_reduce, ch3x3, kernel_size=3, padding=1)
)
# 1x1 -> 5x5 conv batch
self.branch5x5 = nn.Sequential(
nn.Conv2d(in_channels, ch5x5_reduce, kernel_size=1),
nn.Conv2d(ch5x5_reduce, ch5x5, kernel_size=5, padding=2)
)
# 3x3 pool -> 1x1 conv batch
self.branch_pool = nn.Sequential(
nn.MaxPool2d(kernel_size=3, stride=1, padding=1),
nn.Conv2d(in_channels, pool_proj, kernel_size=1)
)
def forward(self, x):
branch1x1 = self.branch1x1(x)
branch3x3 = self.branch3x3(x)
branch5x5 = self.branch5x5(x)
branch_pool = self.branch_pool(x)
outputs = torch.cat([branch1x1, branch3x3, branch5x5, branch_pool], 1)
return outputs
candidate_seq: List[Tuple[TypingOrderedDict[str, Optional[nn.Module]], TypingOrderedDict[str, Optional[nn.Module]]]] = [
(OrderedDict([ # first struct: SmallVGG
('conv1', nn.Conv2d(3, 8, kernel_size=3, padding=1)),
('*1', None),
('conv2', nn.Conv2d(8, 16, kernel_size=3, padding=1)),
('*2', None),
('max1', nn.MaxPool2d(kernel_size=2, stride=2)), # 16x16
('conv3', nn.Conv2d(16, 32, kernel_size=3, padding=1)),
('*3', None),
('conv4', nn.Conv2d(32, 48, kernel_size=3, padding=1)),
('*4', None),
('max2', nn.MaxPool2d(kernel_size=2, stride=2)), # 8x8
('conv5', nn.Conv2d(48, 56, kernel_size=3, padding=1)),
('*5', None),
('conv6', nn.Conv2d(56, 64, kernel_size=3, padding=1)),
('*6', None),
('max3', nn.MaxPool2d(kernel_size=2, stride=2)) # 4x4
]), OrderedDict([
('fc1', nn.Linear(64 * 4 * 4, 512)),
('*1', None),
('fc2', nn.Linear(512, 256)),
('*2', None),
('fc3', nn.Linear(256, 10))
])),
(OrderedDict([ # second struct: LeNet-5
('conv1', nn.Conv2d(3, 12, kernel_size=5, stride=1, padding=2)),
('*1', None),
('avg1', nn.AvgPool2d(kernel_size=2, stride=2)),
('conv2', nn.Conv2d(12, 32, kernel_size=5)),
('*2', None),
('avg2', nn.AvgPool2d(kernel_size=2, stride=2)),
]), OrderedDict([
('fc1', nn.Linear(32 * 6 * 6, 256)),
('*3', None),
('fc2', nn.Linear(256, 128)),
('*4', None),
('fc3', nn.Linear(128, 10))
])),
(OrderedDict([ # third struct: 2012AlexNet
('conv1', nn.Conv2d(3, 64, kernel_size=5, stride=1, padding=2)), # 32x32
('*1', None),
('max1', nn.MaxPool2d(kernel_size=2, stride=2)), # 16x16
('conv2', nn.Conv2d(64, 192, kernel_size=5, padding=2)),
('*2', None),
('max2', nn.MaxPool2d(kernel_size=2, stride=2)), # 8x8
('conv3', nn.Conv2d(192, 384, kernel_size=3, padding=1)),
('*3', None),
('conv4', nn.Conv2d(384, 256, kernel_size=3, padding=1)),
('*4', None),
('conv5', nn.Conv2d(256, 256, kernel_size=3, padding=1)),
('*5', None),
('max3', nn.MaxPool2d(kernel_size=2, stride=2)) # 4x4
]), OrderedDict([
('fc1', nn.Linear(256 * 4 * 4, 4096)), # 256 * 4 * 4 = 4096
('*6', None),
('dropout1', nn.Dropout()),
('fc2', nn.Linear(4096, 4096)),
('*7', None),
('dropout2', nn.Dropout()),
('fc3', nn.Linear(4096, 10))
])),
(OrderedDict([ # fourth struct: 2014GoogLeNet
('conv1', nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3)),
('max1', nn.MaxPool2d(kernel_size=3, stride=2, padding=1)),
('conv2', nn.Conv2d(64, 64, kernel_size=1)),
('conv3', nn.Conv2d(64, 192, kernel_size=3, padding=1)),
('max2', nn.MaxPool2d(kernel_size=3, stride=2, padding=1)),
# Inception 模块
('inception3a', Inception(192, 64, 96, 128, 16, 32, 32)),
('inception3b', Inception(256, 128, 128, 192, 32, 96, 64)),
('max3', nn.MaxPool2d(kernel_size=3, stride=2, padding=1)),
('inception4a', Inception(480, 192, 96, 208, 16, 48, 64)),
('inception4b', Inception(512, 160, 112, 224, 24, 64, 64)),
('inception4c', Inception(512, 128, 128, 256, 24, 64, 64)),
('inception4d', Inception(512, 112, 144, 288, 32, 64, 64)),
('inception4e', Inception(528, 256, 160, 320, 32, 128, 128)),
('max4', nn.MaxPool2d(kernel_size=3, stride=2, padding=1)),
('inception5a', Inception(832, 256, 160, 320, 32, 128, 128)),
('inception5b', Inception(832, 384, 192, 384, 48, 128, 128)),
('avg1', nn.AdaptiveAvgPool2d((1, 1))),
('dropout1', nn.Dropout(0.4)),
]), OrderedDict([
('fc1', nn.Linear(1024, 10))
]))
]
def mix_seq_and_act(seq: Tuple[TypingOrderedDict, TypingOrderedDict],
activation_func: nn.Module) -> Tuple[nn.Sequential, nn.Sequential]:
"""
replace all layers whose names start with '*' to the selected activation function
"""
conv_seq = seq[0].copy()
for name, module in conv_seq.items():
if name.startswith('*'):
conv_seq[name] = activation_func
fc_seq = seq[1].copy()
for name, module in fc_seq.items():
if name.startswith('*'):
fc_seq[name] = activation_func
return nn.Sequential(conv_seq), nn.Sequential(fc_seq)
candidate_seq_name = ['SmallVGG', 'LeNet-5', '2012AlexNet', '2014GoogLeNet', ]
candidate_activation_func: List[nn.Module] = [nn.ReLU(), nn.ELU(), nn.LeakyReLU(), nn.SiLU()]
exp4_universal_train_dataset = SVHNDataset(mat_file=os.path.join(path_dataset, "train_32x32.mat"))
exp4_train_dataset, exp4_valid_dataset = split_train_valid(exp4_universal_train_dataset, train_ratio=0.8)
exp4_mean, exp4_std = exp4_train_dataset.get_meanstd(contrast_factor=1.8)
exp4_1_hyperparams = {
"num_epochs": 100,
"lr": 1e-3,
"criterion": nn.CrossEntropyLoss(),
"optimizer": optim.Adam,
"train_transform": A.Compose([
A.Lambda(image=lambda img, **kwargs: ContrastEnhanceTransform(1.8)(img)),
A.RandomResizedCrop(32, 32, scale=(0.6, 1.0), ratio=(0.75, 1.0 / 0.75)),
A.Rotate(limit=15),
A.Normalize(mean=exp4_mean, std=exp4_std),
ToTensorV2()
]),
"valid_transform": A.Compose([
A.Normalize(mean=exp4_mean, std=exp4_std),
ToTensorV2()
])
}
exp4_train_dataset.transform = exp4_1_hyperparams["train_transform"]
exp4_valid_dataset.transform = exp4_1_hyperparams["valid_transform"]
exp4_test_dataset = SVHNDataset(mat_file=os.path.join(path_dataset, "test_32x32.mat"), transform=exp4_1_hyperparams["valid_transform"])
print(f"Training Size:{exp4_train_dataset.__len__()}, Validation Size:{exp4_valid_dataset.__len__()}")
print(f"Channel Means:{exp4_mean}\nChannel Stds:{exp4_std}")
D:\Temps\temp\ipykernel_87160\2006685800.py:13: UserWarning: Using lambda is incompatible with multiprocessing. Consider using regular functions or partial(). A.Lambda(image=lambda img, **kwargs: ContrastEnhanceTransform(1.8)(img)),
Training Size:58605, Validation Size:14652 Channel Means:[0.5003052918497749, 0.5060428476243829, 0.5373523839665758] Channel Stds:[0.2596073596983816, 0.2605003081414301, 0.2568807907866938]
exp4_train_loader = DataLoader(exp4_train_dataset, batch_size=128, shuffle=True)
exp4_valid_loader = DataLoader(exp4_valid_dataset, batch_size=128, shuffle=False)
exp4_test_loader = DataLoader(exp4_test_dataset, batch_size=128, shuffle=False)
def run_exp4_1(sequence_with_name: Tuple[List[str], List[Tuple]],
activations: List,
hyper_params: Dict[str, Any],
train_loader: DataLoader,
valid_loader: DataLoader) -> List[Dict[str, Union[List[float], dict, float, int]]]:
combinations = list(itertools.product(sequence_with_name, activations))
experiments = []
for i, combo in enumerate(combinations):
(seq_name, seq), activations = combo
print(f"Running Exp {i + 1}: shape={seq_name}, activation func={activations.__class__.__name__}")
this_model = SmallVGG()
conv, fc = mix_seq_and_act(seq, activations)
this_model.conv_layers = conv # new conv_layers
this_model.fc_layers = fc # new fc_layers
this_model = this_model.to(device)
num_epochs = hyper_params['num_epochs']
lr = hyper_params['lr']
criterion = hyper_params['criterion']
optimizer = hyper_params['optimizer'](this_model.parameters(), lr=lr)
# Train Model
print(f"Exp {i + 1}: Generating dataset from transform")
train_losses, valid_losses = train_and_evaluate(this_model,
train_loader, valid_loader,
criterion, optimizer, num_epochs,
stop_early_params={
"min_delta": 0.01,
"patience": 5
})
experiments.append({
"shape": seq_name,
"act_func": activations.__class__.__name__,
"train_losses": train_losses,
"valid_losses": valid_losses,
"model_state_dict": this_model.state_dict()
})
del this_model, criterion, optimizer
torch.cuda.empty_cache()
return experiments
exp4_1 = run_exp4_1(zip(candidate_seq_name, candidate_seq),
candidate_activation_func,
exp4_1_hyperparams,
exp4_train_loader,
exp4_valid_loader)
time_str = str(time.time()).replace(".", "")
torch.save(exp4_1, f"./models/exp4-1_{time_str}.pth")
def exp4_1_get_experiment_results(loaded_experiments,
sequence_with_name: Tuple[List[str], List[Tuple]],
activations: List,
test_hyperparam_names: Dict[str, Any],
extra_loader: DataLoader):
combinations = list(itertools.product(sequence_with_name, activations))
experiment_results = []
n1, n2 = test_hyperparam_names
for i, [combo, exp] in enumerate(zip(combinations, loaded_experiments)):
(seq_name, seq), activations = combo
this_model = SmallVGG()
conv, fc = mix_seq_and_act(seq, activations)
this_model.conv_layers = conv
this_model.fc_layers = fc
this_model.load_state_dict(exp["model_state_dict"])
this_model = this_model.to(device)
pred_scores = []
true_labels = []
pred_labels = []
with torch.no_grad():
for images, labels in tqdm(extra_loader):
images, labels = images.to(device), labels.to(device)
outputs = this_model(images)
pred_scores_batch = nn.functional.softmax(outputs, dim=-1)
pred_scores.extend(pred_scores_batch.cpu().tolist())
pred_labels.extend(outputs.argmax(dim=1).tolist())
true_labels.extend(labels.cpu().tolist())
experiment_results.append({
n1: exp[n1],
n2: exp[n2],
"true_labels": true_labels,
"pred_labels": pred_labels,
"pred_scores": pred_scores
})
print(f"First 10 true labels: {true_labels[:10]}")
print(f"First 10 pred_labels: {pred_labels[:10]}")
print(f"First 5 pred_scores: {pred_scores[:5]}")
torch.cuda.empty_cache()
return experiment_results
exp4_1_loaded = torch.load("./models/exp4-1_17308362503569772.pth")
exp4_1_results = exp4_1_get_experiment_results(exp4_1_loaded,
zip(candidate_seq_name, candidate_seq),
candidate_activation_func,
test_hyperparam_names=["shape", "act_func"],
extra_loader=exp4_test_loader)
plot_el(exp4_1_loaded, ["shape", "act_func"], n_rows=4, n_cols=4)
plot_cm(exp4_1_results, ["shape", "act_func"], n_rows=4, n_cols=4)
exp4_1_accuracies, exp4_1_f1s = plot_pr(exp4_1_results, ["shape", "act_func"], n_rows=4, n_cols=4)
print_metrics(exp4_1_accuracies, exp4_1_f1s)
Accuracies: 0.915 0.932 0.924 0.937 0.903 0.901 0.910 0.918 0.196 0.078 0.078 0.196 0.925 0.925 0.925 0.925 F1 Score Lists: 0.930 0.934 0.935 0.889 0.928 0.917 0.893 0.916 0.867 0.893 | Avg F1=0.910, Std F1=0.02214185752235704 0.924 0.950 0.952 0.909 0.948 0.936 0.912 0.934 0.892 0.909 | Avg F1=0.927, Std F1=0.019710223823189963 0.932 0.942 0.943 0.893 0.941 0.924 0.907 0.924 0.875 0.903 | Avg F1=0.918, Std F1=0.021909506799942538 0.940 0.951 0.957 0.918 0.948 0.937 0.922 0.937 0.904 0.910 | Avg F1=0.932, Std F1=0.01715071048985564 0.894 0.929 0.931 0.873 0.923 0.905 0.873 0.901 0.856 0.870 | Avg F1=0.895, Std F1=0.025416825296858667 0.891 0.922 0.934 0.877 0.908 0.902 0.882 0.905 0.857 0.851 | Avg F1=0.893, Std F1=0.025177077858284696 0.904 0.936 0.932 0.881 0.921 0.911 0.888 0.911 0.867 0.871 | Avg F1=0.902, Std F1=0.023355308505421304 0.908 0.939 0.947 0.883 0.937 0.922 0.901 0.911 0.885 0.873 | Avg F1=0.911, Std F1=0.024231694376209662 0.000 0.328 0.000 0.000 0.000 0.000 0.000 0.000 0.000 0.000 | Avg F1=0.033, Std F1=0.09827503131926378 0.000 0.000 0.000 0.000 0.000 0.000 0.000 0.144 0.000 0.000 | Avg F1=0.014, Std F1=0.0431902452937821 0.000 0.000 0.000 0.000 0.000 0.000 0.000 0.144 0.000 0.000 | Avg F1=0.014, Std F1=0.04318562618088482 0.000 0.328 0.000 0.000 0.000 0.000 0.000 0.000 0.000 0.000 | Avg F1=0.033, Std F1=0.09827503131926378 0.897 0.955 0.950 0.894 0.953 0.939 0.897 0.931 0.885 0.855 | Avg F1=0.915, Std F1=0.0328550328555584 0.897 0.955 0.950 0.894 0.953 0.939 0.897 0.931 0.885 0.855 | Avg F1=0.915, Std F1=0.0328550328555584 0.897 0.955 0.950 0.894 0.953 0.939 0.897 0.931 0.885 0.855 | Avg F1=0.915, Std F1=0.0328550328555584 0.897 0.955 0.950 0.894 0.953 0.939 0.897 0.931 0.885 0.855 | Avg F1=0.915, Std F1=0.0328550328555584 Best: 4-th
plot_rocauc(exp4_1_results, ["shape", "act_func"], curve_type="all", n_rows=4, n_cols=4)
plot_rocauc(exp4_1_results, ["shape", "act_func"], curve_type="macro_micro", n_rows=4, n_cols=4)